package util;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.DefaultHttpClient;

public class HTMLUtil {
	private static DefaultHttpClient httpclient = new DefaultHttpClient();
	
	public static String getHtmlPage(String url){
		HttpUriRequest method = new HttpGet(url);
		String result = "";
		try {
			HttpResponse response = httpclient.execute(method);
			BufferedReader in = new BufferedReader(new InputStreamReader(
					response.getEntity().getContent(), "UTF-8"));
			String line;
			while ((line = in.readLine()) != null) {
				result = result + line + "\n";
			}
			in.close();
		} catch (Exception e) {
			System.out.println();
		}
		return result;
	}
	
	public static String extractUrl(String html) {
		Pattern pattern = Pattern.compile("(href=\")(.+)(\" \\w)");
		Matcher matcher = pattern.matcher(html);
		String url = "";
		if(matcher.find())
			url = matcher.group(2);
		return url;
	}
	
	public static ArrayList<String> extractUrls(String html){
		ArrayList<String> urls = new ArrayList<String>();
		Pattern pattern = Pattern.compile("(href=\")(.+)(\" )");
		Matcher matcher = pattern.matcher(html);
		String url = "";
		while(matcher.find()){
			url = matcher.group(2);
			urls.add(url);
		}
		return urls;
	}
}
